/** * Copyright 2015 StreamSets Inc. * * Licensed under the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.streamsets.pipeline.stage.destination.cassandra; import com.datastax.driver.core.Cluster; import com.datastax.driver.core.LocalDate; import com.datastax.driver.core.ProtocolVersion; import com.datastax.driver.core.ResultSet; import com.datastax.driver.core.Row; import com.datastax.driver.core.Session; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.streamsets.pipeline.api.Field; import com.streamsets.pipeline.api.OnRecordError; import com.streamsets.pipeline.api.Record; import com.streamsets.pipeline.api.StageException; import com.streamsets.pipeline.api.Target; import com.streamsets.pipeline.sdk.RecordCreator; import com.streamsets.pipeline.sdk.TargetRunner; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.thrift.transport.TTransportException; import org.cassandraunit.utils.EmbeddedCassandraServerHelper; import org.junit.After; import org.junit.AfterClass; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Map; import static org.junit.Assert.fail; public class TestCassandraTarget { private static final Logger LOG = LoggerFactory.getLogger(TestCassandraTarget.class); private static final Double EPSILON = 1e-15; private static final long CASSANDRA_STARTUP_TIMEOUT = 20000; private static final String SAMPLE_TIMEUUID = "474b1386-0379-11e7-bdfe-fa245441bcee"; private static final String SAMPLE_UUID = "46c5379c-a083-4ccd-bfac-c4a8d17574c7"; private static int CASSANDRA_NATIVE_PORT = 9142; private static Cluster cluster = null; private static Session session = null; @SuppressWarnings("unchecked") @BeforeClass public static void setUpClass() throws InterruptedException, TTransportException, ConfigurationException, IOException { EmbeddedCassandraServerHelper.startEmbeddedCassandra(CASSANDRA_STARTUP_TIMEOUT); cluster = Cluster.builder() .addContactPoint("127.0.0.1") .withPort(CASSANDRA_NATIVE_PORT) .withProtocolVersion(ProtocolVersion.V4) .build(); session = cluster.connect(); } @AfterClass public static void tearDownClass() { session.close(); cluster.close(); } @Before public void setUp() { session.execute("CREATE KEYSPACE IF NOT EXISTS test WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 };"); session.execute( "CREATE TABLE IF NOT EXISTS test.trips (" + "driver_id int," + "trip_id int," + "time int," + "x double," + "y double," + "dt date," + "ts timestamp," + "time_id timeuuid, " + "unique_id uuid, " + "PRIMARY KEY (driver_id, trip_id)" + ");" ); session.execute( "CREATE TABLE IF NOT EXISTS test.collections (id int, a_list list<int>, a_map map<text, int>, PRIMARY KEY(id));" ); session.execute( "CREATE TABLE IF NOT EXISTS test.test_null_values (a varchar, b varchar, PRIMARY KEY(a));" ); } @After public void tearDown() { session.execute("DROP TABLE IF EXISTS test.test_null_values"); session.execute("DROP TABLE IF EXISTS test.collections"); session.execute("DROP TABLE IF EXISTS test.trips"); session.execute("DROP KEYSPACE IF EXISTS test"); } @Test public void testWriteEmptyBatch() throws InterruptedException, StageException { final String tableName = "test.trips"; List<CassandraFieldMappingConfig> fieldMappings = ImmutableList.of( new CassandraFieldMappingConfig("[0]", "driver_id"), new CassandraFieldMappingConfig("[1]", "trip_id"), new CassandraFieldMappingConfig("[2]", "time"), new CassandraFieldMappingConfig("[3]", "x"), new CassandraFieldMappingConfig("[4]", "y"), new CassandraFieldMappingConfig("[5]", "time_id"), new CassandraFieldMappingConfig("[6]", "unique_id") ); CassandraTargetConfig conf = new CassandraTargetConfig(); conf.contactPoints.add("localhost"); conf.port = CASSANDRA_NATIVE_PORT; conf.protocolVersion = ProtocolVersion.V4; conf.useCredentials = false; conf.compression = CassandraCompressionCodec.NONE; conf.columnNames = fieldMappings; conf.qualifiedTableName = tableName; Target target = new CassandraTarget(conf); TargetRunner targetRunner = new TargetRunner.Builder(CassandraDTarget.class, target).build(); List<Record> emptyBatch = ImmutableList.of(); targetRunner.runInit(); targetRunner.runWrite(emptyBatch); targetRunner.runDestroy(); } @Test public void testWriteSingleRecord() throws InterruptedException, StageException { final String tableName = "test.trips"; List<CassandraFieldMappingConfig> fieldMappings = ImmutableList.of( new CassandraFieldMappingConfig("[0]", "driver_id"), new CassandraFieldMappingConfig("[1]", "trip_id"), new CassandraFieldMappingConfig("[2]", "time"), new CassandraFieldMappingConfig("[3]", "x"), new CassandraFieldMappingConfig("[4]", "y"), new CassandraFieldMappingConfig("[5]", "dt"), new CassandraFieldMappingConfig("[6]", "ts"), new CassandraFieldMappingConfig("[7]", "time_id"), new CassandraFieldMappingConfig("[8]", "unique_id") ); CassandraTargetConfig conf = new CassandraTargetConfig(); conf.contactPoints.add("localhost"); conf.port = CASSANDRA_NATIVE_PORT; conf.protocolVersion = ProtocolVersion.V4; conf.useCredentials = false; conf.compression = CassandraCompressionCodec.NONE; conf.columnNames = fieldMappings; conf.qualifiedTableName = tableName; Target target = new CassandraTarget(conf); TargetRunner targetRunner = new TargetRunner.Builder(CassandraDTarget.class, target).build(); long now = System.currentTimeMillis(); LocalDate dt = LocalDate.fromMillisSinceEpoch(now); Date ts = new Date(); Record record = RecordCreator.create(); List<Field> fields = new ArrayList<>(); fields.add(Field.create(1)); fields.add(Field.create(2)); fields.add(Field.create(3)); fields.add(Field.create(4.0)); fields.add(Field.create(5.0)); fields.add(Field.create(Field.Type.DATE, new Date(dt.getMillisSinceEpoch()))); fields.add(Field.create(Field.Type.DATETIME, ts)); fields.add(Field.create(SAMPLE_TIMEUUID)); fields.add(Field.create(SAMPLE_UUID)); record.set(Field.create(fields)); List<Record> singleRecord = ImmutableList.of(record); targetRunner.runInit(); targetRunner.runWrite(singleRecord); // Should not be any error records. Assert.assertTrue(targetRunner.getErrorRecords().isEmpty()); Assert.assertTrue(targetRunner.getErrors().isEmpty()); targetRunner.runDestroy(); ResultSet resultSet = session.execute("SELECT * FROM test.trips"); List<Row> allRows = resultSet.all(); Assert.assertEquals(1, allRows.size()); Row row = allRows.get(0); Assert.assertEquals(1, row.getInt("driver_id")); Assert.assertEquals(2, row.getInt("trip_id")); Assert.assertEquals(3, row.getInt("time")); Assert.assertEquals(4.0, row.getDouble("x"), EPSILON); Assert.assertEquals(5.0, row.getDouble("y"), EPSILON); Assert.assertEquals(dt, row.getDate("dt")); Assert.assertEquals(ts, row.getTimestamp("ts")); Assert.assertEquals(SAMPLE_TIMEUUID, row.getUUID("time_id").toString()); Assert.assertEquals(SAMPLE_UUID, row.getUUID("unique_id").toString()); } @Test public void testCollectionTypes() throws InterruptedException, StageException { final String tableName = "test.collections"; List<CassandraFieldMappingConfig> fieldMappings = ImmutableList.of( new CassandraFieldMappingConfig("[0]", "id"), new CassandraFieldMappingConfig("[1]", "a_list"), new CassandraFieldMappingConfig("[2]", "a_map") ); CassandraTargetConfig conf = new CassandraTargetConfig(); conf.contactPoints.add("localhost"); conf.port = CASSANDRA_NATIVE_PORT; conf.protocolVersion = ProtocolVersion.V4; conf.useCredentials = false; conf.compression = CassandraCompressionCodec.NONE; conf.columnNames = fieldMappings; conf.qualifiedTableName = tableName; Target target = new CassandraTarget(conf); TargetRunner targetRunner = new TargetRunner.Builder(CassandraDTarget.class, target).build(); Record record = RecordCreator.create(); List<Field> fields = new ArrayList<>(); fields.add(Field.create(1)); fields.add(Field.create(ImmutableList.of(Field.create(2)))); fields.add(Field.create(ImmutableMap.of("3", Field.create(4)))); record.set(Field.create(fields)); List<Record> singleRecord = ImmutableList.of(record); targetRunner.runInit(); targetRunner.runWrite(singleRecord); // Should not be any error records. Assert.assertTrue(targetRunner.getErrorRecords().isEmpty()); Assert.assertTrue(targetRunner.getErrors().isEmpty()); targetRunner.runDestroy(); ResultSet resultSet = session.execute("SELECT * FROM test.collections"); List<Row> allRows = resultSet.all(); assert (1 == allRows.size()); Row row = allRows.get(0); assert (row.getInt("id") == 1); assert (row.getList("a_list", Integer.class).contains(2)); assert (row.getMap("a_map", String.class, Integer.class).containsKey("3")); } @Test public void testWriteRecordsOnErrorDiscard() throws Exception { final String tableName = "test.trips"; List<CassandraFieldMappingConfig> fieldMappings = ImmutableList.of( new CassandraFieldMappingConfig("[0]", "driver_id"), new CassandraFieldMappingConfig("[1]", "trip_id"), new CassandraFieldMappingConfig("[2]", "time"), new CassandraFieldMappingConfig("[3]", "x"), new CassandraFieldMappingConfig("[4]", "y"), new CassandraFieldMappingConfig("[5]", "time_id"), new CassandraFieldMappingConfig("[6]", "unique_id") ); CassandraTargetConfig conf = new CassandraTargetConfig(); conf.contactPoints.add("localhost"); conf.port = CASSANDRA_NATIVE_PORT; conf.protocolVersion = ProtocolVersion.V4; conf.useCredentials = false; conf.compression = CassandraCompressionCodec.NONE; conf.columnNames = fieldMappings; conf.qualifiedTableName = tableName; Target target = new CassandraTarget(conf); TargetRunner targetRunner = new TargetRunner.Builder(CassandraDTarget.class, target) .setOnRecordError(OnRecordError.DISCARD) .build(); Record record = RecordCreator.create(); List<Field> fields = new ArrayList<>(); fields.add(Field.create(1.3)); fields.add(Field.create(2)); fields.add(Field.create(3)); fields.add(Field.create(4.0)); fields.add(Field.create(5.0)); fields.add(Field.create(SAMPLE_TIMEUUID)); fields.add(Field.create(SAMPLE_UUID)); record.set(Field.create(fields)); List<Record> singleRecord = ImmutableList.of(record); targetRunner.runInit(); targetRunner.runWrite(singleRecord); // Should not be any error records if we are discarding. Assert.assertTrue(targetRunner.getErrorRecords().isEmpty()); Assert.assertTrue(targetRunner.getErrors().isEmpty()); targetRunner.runDestroy(); ResultSet resultSet = session.execute("SELECT * FROM test.trips"); List<Row> allRows = resultSet.all(); Assert.assertEquals(0, allRows.size()); } @Test public void testWriteRecordsOnErrorToError() throws Exception { final String tableName = "test.trips"; List<CassandraFieldMappingConfig> fieldMappings = ImmutableList.of( new CassandraFieldMappingConfig("[0]", "driver_id"), new CassandraFieldMappingConfig("[1]", "trip_id"), new CassandraFieldMappingConfig("[2]", "time"), new CassandraFieldMappingConfig("[3]", "x"), new CassandraFieldMappingConfig("[4]", "y"), new CassandraFieldMappingConfig("[5]", "time_id"), new CassandraFieldMappingConfig("[6]", "unique_id") ); CassandraTargetConfig conf = new CassandraTargetConfig(); conf.contactPoints.add("localhost"); conf.port = CASSANDRA_NATIVE_PORT; conf.protocolVersion = ProtocolVersion.V4; conf.useCredentials = false; conf.compression = CassandraCompressionCodec.NONE; conf.columnNames = fieldMappings; conf.qualifiedTableName = tableName; Target target = new CassandraTarget(conf); TargetRunner targetRunner = new TargetRunner.Builder(CassandraDTarget.class, target) .setOnRecordError(OnRecordError.TO_ERROR) .build(); Record record = RecordCreator.create(); List<Field> fields = new ArrayList<>(); fields.add(Field.create(1.3)); fields.add(Field.create(2)); fields.add(Field.create(3)); // intentionally passing doubles where these values are expected to be ints triggers errors! fields.add(Field.create(4.0)); fields.add(Field.create(5.0)); fields.add(Field.create(SAMPLE_TIMEUUID)); fields.add(Field.create(SAMPLE_UUID)); record.set(Field.create(fields)); List<Record> singleRecord = ImmutableList.of(record); targetRunner.runInit(); targetRunner.runWrite(singleRecord); // Should have gone to error pipeline Assert.assertEquals(1, targetRunner.getErrorRecords().size()); Assert.assertTrue(targetRunner.getErrors().isEmpty()); targetRunner.runDestroy(); ResultSet resultSet = session.execute("SELECT * FROM test.trips"); List<Row> allRows = resultSet.all(); Assert.assertEquals(0, allRows.size()); } @Test(expected = StageException.class) public void testWriteRecordsOnErrorStopPipeline() throws Exception { final String tableName = "test.trips"; List<CassandraFieldMappingConfig> fieldMappings = ImmutableList.of( new CassandraFieldMappingConfig("[0]", "driver_id"), new CassandraFieldMappingConfig("[1]", "trip_id"), new CassandraFieldMappingConfig("[2]", "time"), new CassandraFieldMappingConfig("[3]", "x"), new CassandraFieldMappingConfig("[4]", "y"), new CassandraFieldMappingConfig("[5]", "time_id"), new CassandraFieldMappingConfig("[6]", "unique_id") ); CassandraTargetConfig conf = new CassandraTargetConfig(); conf.contactPoints.add("localhost"); conf.port = CASSANDRA_NATIVE_PORT; conf.protocolVersion = ProtocolVersion.V4; conf.useCredentials = false; conf.compression = CassandraCompressionCodec.NONE; conf.columnNames = fieldMappings; conf.qualifiedTableName = tableName; Target target = new CassandraTarget(conf); TargetRunner targetRunner = new TargetRunner.Builder(CassandraDTarget.class, target) .setOnRecordError(OnRecordError.STOP_PIPELINE) .build(); Record record = RecordCreator.create(); List<Field> fields = new ArrayList<>(); fields.add(Field.create(1.3)); fields.add(Field.create(2)); fields.add(Field.create(3)); fields.add(Field.create(4.0)); fields.add(Field.create(5.0)); fields.add(Field.create(SAMPLE_TIMEUUID)); fields.add(Field.create(SAMPLE_UUID)); record.set(Field.create(fields)); List<Record> singleRecord = ImmutableList.of(record); targetRunner.runInit(); targetRunner.runWrite(singleRecord); // Should have gone to error pipeline Assert.assertEquals(1, targetRunner.getErrorRecords().size()); Assert.assertTrue(targetRunner.getErrors().isEmpty()); targetRunner.runDestroy(); ResultSet resultSet = session.execute("SELECT * FROM test.trips"); List<Row> allRows = resultSet.all(); Assert.assertEquals(0, allRows.size()); } @Test public void testWriteRecordWithMissingFields() throws InterruptedException, StageException { final String tableName = "test.trips"; List<CassandraFieldMappingConfig> fieldMappings = ImmutableList.of( new CassandraFieldMappingConfig("/driver", "driver_id"), new CassandraFieldMappingConfig("/trip", "trip_id"), new CassandraFieldMappingConfig("/time", "time"), new CassandraFieldMappingConfig("/x", "x"), new CassandraFieldMappingConfig("/y", "y"), new CassandraFieldMappingConfig("/time_id", "time_id"), new CassandraFieldMappingConfig("/unique_id", "unique_id") ); CassandraTargetConfig conf = new CassandraTargetConfig(); conf.contactPoints.add("localhost"); conf.port = CASSANDRA_NATIVE_PORT; conf.protocolVersion = ProtocolVersion.V4; conf.useCredentials = false; conf.compression = CassandraCompressionCodec.NONE; conf.columnNames = fieldMappings; conf.qualifiedTableName = tableName; Target target = new CassandraTarget(conf); TargetRunner targetRunner = new TargetRunner.Builder(CassandraDTarget.class, target).build(); Record record = RecordCreator.create(); Map<String, Field> fields = new ImmutableMap.Builder<String, Field>() .put("driver", Field.create(1)) .put("trip", Field.create(2)) .put("time", Field.create(3)) .put("y", Field.create(5.0)) .put("time_id", Field.create(SAMPLE_TIMEUUID)) .put("unique_id", Field.create(SAMPLE_UUID)) .build(); record.set(Field.create(fields)); List<Record> singleRecord = ImmutableList.of(record); targetRunner.runInit(); targetRunner.runWrite(singleRecord); // Should not be any error records. Assert.assertTrue(targetRunner.getErrorRecords().isEmpty()); Assert.assertTrue(targetRunner.getErrors().isEmpty()); targetRunner.runDestroy(); ResultSet resultSet = session.execute("SELECT * FROM test.trips"); List<Row> allRows = resultSet.all(); Assert.assertEquals(1, allRows.size()); Row row = allRows.get(0); Assert.assertEquals(1, row.getInt("driver_id")); Assert.assertEquals(2, row.getInt("trip_id")); Assert.assertEquals(3, row.getInt("time")); Assert.assertEquals(null, row.getBytesUnsafe("x")); Assert.assertEquals(5.0, row.getDouble("y"), EPSILON); Assert.assertEquals(SAMPLE_TIMEUUID, row.getUUID("time_id").toString()); Assert.assertEquals(SAMPLE_UUID, row.getUUID("unique_id").toString()); } @Test(expected = StageException.class) public void testMalformedTableName() throws Exception { List<CassandraFieldMappingConfig> fieldMappings = ImmutableList.of( new CassandraFieldMappingConfig("/driver", "driver_id"), new CassandraFieldMappingConfig("/trip", "trip_id"), new CassandraFieldMappingConfig("/time", "time"), new CassandraFieldMappingConfig("/x", "x"), new CassandraFieldMappingConfig("/y", "y"), new CassandraFieldMappingConfig("/time_id", "time_id"), new CassandraFieldMappingConfig("/unique_id", "unique_id") ); CassandraTargetConfig conf = new CassandraTargetConfig(); conf.contactPoints.add("localhost"); conf.port = CASSANDRA_NATIVE_PORT; conf.protocolVersion = ProtocolVersion.V4; conf.useCredentials = false; conf.compression = CassandraCompressionCodec.NONE; conf.columnNames = fieldMappings; conf.qualifiedTableName = "tableName"; Target target = new CassandraTarget(conf); TargetRunner targetRunner = new TargetRunner.Builder(CassandraDTarget.class, target).build(); targetRunner.runInit(); fail("should have thrown a StageException!"); } @Test public void testInternalSubBatching() throws Exception { final String tableName = "test.trips"; List<CassandraFieldMappingConfig> fieldMappings = ImmutableList.of( new CassandraFieldMappingConfig("[0]", "driver_id"), new CassandraFieldMappingConfig("[1]", "trip_id"), new CassandraFieldMappingConfig("[2]", "time"), new CassandraFieldMappingConfig("[3]", "x"), new CassandraFieldMappingConfig("[4]", "y"), new CassandraFieldMappingConfig("[5]", "time_id"), new CassandraFieldMappingConfig("[6]", "unique_id") ); CassandraTargetConfig conf = new CassandraTargetConfig(); conf.contactPoints.add("localhost"); conf.port = CASSANDRA_NATIVE_PORT; conf.protocolVersion = ProtocolVersion.V4; conf.useCredentials = false; conf.compression = CassandraCompressionCodec.NONE; conf.columnNames = fieldMappings; conf.qualifiedTableName = tableName; conf.maxBatchSize = 35; Target target = new CassandraTarget(conf); TargetRunner targetRunner = new TargetRunner.Builder(CassandraDTarget.class, target).build(); List<Record> records = new ArrayList<>(); for (int i = 0; i < 1000; i++) { Record record = RecordCreator.create(); List<Field> fields = new ArrayList<>(); fields.add(Field.create(i)); fields.add(Field.create(2)); fields.add(Field.create(3)); fields.add(Field.create(4.0)); fields.add(Field.create(5.0)); fields.add(Field.create(SAMPLE_TIMEUUID)); fields.add(Field.create(SAMPLE_UUID)); record.set(Field.create(fields)); records.add(record); } targetRunner.runInit(); targetRunner.runWrite(records); // Should not be any error records. Assert.assertTrue(targetRunner.getErrorRecords().isEmpty()); Assert.assertTrue(targetRunner.getErrors().isEmpty()); targetRunner.runDestroy(); // simple verification that there are as many records as expected ResultSet resultSet = session.execute("SELECT * FROM test.trips"); List<Row> allRows = resultSet.all(); Assert.assertEquals(1000, allRows.size()); } @Test public void testWriteNullValuedColumns() throws Exception { final String tableName = "test.test_null_values"; List<CassandraFieldMappingConfig> fieldMappings = ImmutableList.of(new CassandraFieldMappingConfig("[0]", "a"), new CassandraFieldMappingConfig("[1]", "b") ); CassandraTargetConfig conf = new CassandraTargetConfig(); conf.contactPoints.add("localhost"); conf.port = CASSANDRA_NATIVE_PORT; conf.protocolVersion = ProtocolVersion.V4; conf.useCredentials = false; conf.compression = CassandraCompressionCodec.NONE; conf.columnNames = fieldMappings; conf.qualifiedTableName = tableName; Target target = new CassandraTarget(conf); TargetRunner targetRunner = new TargetRunner.Builder(CassandraDTarget.class, target).build(); Record record = RecordCreator.create(); List<Field> fields = new ArrayList<>(); fields.add(Field.create("abc")); fields.add(Field.create(Field.Type.STRING, null)); record.set(Field.create(fields)); List<Record> singleRecord = ImmutableList.of(record); targetRunner.runInit(); targetRunner.runWrite(singleRecord); // Should not be any error records. Assert.assertTrue(targetRunner.getErrorRecords().isEmpty()); Assert.assertTrue(targetRunner.getErrors().isEmpty()); targetRunner.runDestroy(); ResultSet resultSet = session.execute("SELECT * FROM test.test_null_values"); List<Row> allRows = resultSet.all(); Assert.assertEquals(1, allRows.size()); Row row = allRows.get(0); Assert.assertEquals("abc", row.getString("a")); Assert.assertEquals(null, row.getString("b")); } }